{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import folium\n",
    "import json\n",
    "import docx\n",
    "import docx2txt\n",
    "from konlpy.tag import Okt\n",
    "from collections import Counter\n",
    "import string\n",
    "import re\n",
    "from gensim import corpora\n",
    "from gensim.models import LdaModel\n",
    "from pprint import pprint\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.rc(\"font\", family = 'Malgun Gothic')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Figure 7 - 18"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = docx2txt.process(r\"  .docx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load stop words from the Excel file into a set\n",
    "stop_words_df = pd.read_excel(r'  .xlsx', engine='openpyxl')\n",
    "stop_words_list = set(stop_words_df.iloc[:, 0].astype(str))\n",
    "\n",
    "# Initialize the Okt tokenizer\n",
    "tokenizer = Okt()\n",
    "\n",
    "# Perform morphological analysis and filter out postpositions and conjunctions\n",
    "tokens = [word for word, tag in tokenizer.pos(text) if tag not in [\"Josa\", \"Conjunction\"]]\n",
    "\n",
    "# Define a pattern for valid tokens\n",
    "pattern = re.compile(r'[가-힣A-Za-z0-9]+')\n",
    "\n",
    "# Further process tokens: remove invalid patterns and stop words\n",
    "processed_tokens = [word for word in tokens if pattern.match(word) and word.lower() not in stop_words_list and word.strip()]\n",
    "\n",
    "# Prepare data for LDA\n",
    "texts = [processed_tokens]\n",
    "dictionary = corpora.Dictionary(texts)\n",
    "corpus = [dictionary.doc2bow(text) for text in texts]\n",
    "\n",
    "# Apply LDA\n",
    "num_topics = 6  # Adjust based on how many topics you want\n",
    "lda_model = LdaModel(corpus, num_topics=num_topics, id2word=dictionary, passes=15)\n",
    "\n",
    "# Set up subplots\n",
    "fig, axes = plt.subplots(3, 2, figsize=(14,16))\n",
    "fig.tight_layout(pad=5.0)\n",
    "\n",
    "# Bar Chart Visualization\n",
    "for t in range(lda_model.num_topics):\n",
    "    topic_words = dict(lda_model.show_topic(t, 20))\n",
    "    \n",
    "    # Set bar colors based on the word\n",
    "    colors = ['red' if word == \"정부\" else 'grey' for word in topic_words.keys()]\n",
    "    \n",
    "    # Determine subplot position\n",
    "    ax = axes[t//2, t%2]\n",
    "    \n",
    "    # Reverse the order of keys and values for plotting\n",
    "    reversed_keys = list(reversed(list(topic_words.keys())))\n",
    "    reversed_values = list(reversed(list(topic_words.values())))\n",
    "    colors = list(reversed(colors))\n",
    "    \n",
    "    # Plot horizontal bars\n",
    "    ax.barh(reversed_keys, reversed_values, color=colors)\n",
    "    \n",
    "    # Set title and adjust font sizes for ticks\n",
    "    ax.set_title(f'Topic {t+1}', fontsize=22)\n",
    "    ax.tick_params(axis='y', rotation=0, labelsize=13)\n",
    "    ax.tick_params(axis='x', labelsize=10)\n",
    "    ax.grid(axis='x')  # Add x-axis grid lines\n",
    "    \n",
    "plt.savefig('figure_7.tiff', dpi=600, bbox_inches='tight')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
